############### ###############
## 0M - Master
## Project: CBO
## PI: Malte Lierl
## Written by: Kamil Kouhen & Malte Lierl
## Purpose: Master file
## Date of creation: 2022-01-18
## This version: 2022-11-07
############### ###############

################################################
# Table of contents: 

# 1. Code setup 
# 2. Read data 
# 3. Load user-written functions
# 4. Data cleaning and construction of outcome indicators
# 5. Outputs: figures
# 6. Outputs: tables

################################################


# 1. Code setup --------------- ######

  seed <- 1234
  set.seed(seed) 
  
  ## Informing what type of data the user has chosen ##
  
  message(paste0("You have chosen to work with: ", datatype))
  
  ## Loading packages from CRAN ##

  if(!require("pacman")) install.packages("pacman")
  
  pacman::p_load(
    tidyverse, 
    ggthemes, 
    here, 
    haven, #to read stata files
    readxl, 
    purrr, 
    Hmisc, 
    forcats, 
    labelled, 
    gridExtra,
    janitor, 
    cowplot, 
    glue, 
    epiDisplay, 
    writexl, 
    randomizr, 
    caret, 
    estimatr, #For most hypothesis tests
    MASS, 
    sandwich, 
    lmtest,
    AER, 
    ShapleyValue, 
    tidyr, 
    zoo, 
    corrr, 
    remotes, 
    knitr,
    kableExtra,
    stargazer, 
    forestplot,
    meta, #For meta analysis type objects (to build forest plot)
    stats, #For p-value correction
    margins, #For marginal effects
    magick, #to crop images
    conflicted, #to resolve package conflicts
    rmarkdown,  
    ggpubr,
    devtools, 
    roxygen2, 
    xaringan, 
    xtable, 
    readstata13 #to read stata files
  )
  
  here() #Working directory of project
  
  ### Avoiding scientific notation
  options(scipen = 100)
  
  ### Getting rid of important package conflicts
  conflicted::conflict_prefer("select", "dplyr")
  conflicted::conflict_prefer("filter", "dplyr")
  conflicted::conflict_prefer("summarize", "dplyr")
  conflicted::conflict_prefer("label", "Hmisc")
  conflicted::conflict_prefer("lag", "dplyr")
  
# 2. Read data --------------- ######
  
  ## Source ##
  
    ## LOAD BLINDED/UNBLINDED SOURCE DATA AT THE LEVEL OF CBOs AND MUNICIPAL DECISION MAKERS
    
    ### CBO group level source data
    if (datatype == "Data/Unblinded"){
      CBO_groups_raw <- as_tibble(read_dta(here(datatype, "Source", "IPA deliverables (modified)", "CBO_usable", "CBO_GROUP_UNBLINDED.dta")))
    }
    if (datatype == "Data/Blinded"){
      CBO_groups_raw <- as_tibble(read_dta(here(datatype, "Source", "IPA deliverables (modified)", "CBO_usable", "CBO_GROUP_BLINDED.dta")))
    }
    
    ### CBO group level treatment-related data
    CBO_groups_treatmentvars <- as_tibble(read_dta(here(datatype, "Source", "IPA deliverables (modified)", "CBO_usable", "CBO_GROUP_TREATMENTVARS.dta")))
    
    ### CBO individual level source data
    if (datatype == "Data/Unblinded"){
      CBO_individuals_raw <- as_tibble(read_dta(here(datatype, "Source", "IPA deliverables (modified)", "CBO_usable", "CBO_INDIVIDUAL_UNBLINDED.dta")))
    }
    if (datatype == "Data/Blinded"){
      CBO_individuals_raw <- as_tibble(read_dta(here(datatype, "Source", "IPA deliverables (modified)", "CBO_usable", "CBO_INDIVIDUAL_BLINDED.dta")))
    }
    
    ### CBO individual level treatment-related data
    CBO_individuals_treatmentvars  <- as_tibble(read_dta(here(datatype, "Source", "IPA deliverables (modified)", "CBO_usable", "CBO_INDIVIDUAL_TREATMENTVARS.dta")))
    
    ### Decision maker level source data
    if (datatype == "Data/Unblinded"){
      DM_raw <- as_tibble(read_dta(here(datatype, "Source", "IPA deliverables (modified)", "DM_usable", "DM_UNBLINDED.dta")))
    }
    if (datatype == "Data/Blinded"){
      DM_raw <- as_tibble(read_dta(here(datatype, "Source", "IPA deliverables (modified)", "DM_usable", "DM_BLINDED.dta")))
    }
    
    ### Decision maker level treatment-related data
    DM_treatmentvars <- as_tibble(read_dta(here(datatype, "Source", "IPA deliverables (modified)", "DM_usable", "DM_TREATMENTVARS.dta")))
    
    
    ## LOAD MUNICIPAL PERFORMANCE DATA
    
    ### SUPERMUN panel data
    supermun <- read.dta13(here(datatype,"Source","SUPERMUN","SUPERMUN Panel 2014-20 with scores.dta"))
    
    ### Municipal performance indicator descriptions
    indicators <- read.csv(here(datatype,"Source","SUPERMUN","SUPERMUN Indicator Definitions for Analysis.csv"), stringsAsFactors = FALSE)
    
    ## LOAD MUNICIPALITY-LEVEL TREATMENT ASSIGNMENT
    
    if (datatype == "Data/Unblinded"){
      treatment.cbo <- as_tibble(read_dta(here(datatype, "Source", "Treatment Assignment", "DM_UNBLINDED.dta")))
    }
    if (datatype == "Data/Blinded"){
      ### GENERATE MUNICIPALITY-LEVEL MOCK TREATMENT IDENTIFIERS FOR BLIND ANALYSIS
      set.seed(20220512)
      treatment.cbo <- data.frame(unique(supermun[supermun$year!=2016,c("region", "commune")]))
      dim(treatment.cbo)
      # create mock treatment identifiers
      treatment.cbo$situation <- sample(c(rep("Difficult",14), rep("Impossible",19), rep("Possible",316)))
      treatment.cbo$cboincentives <- sample(c(rep(0,174), rep(1,175)))
      treatment.cbo$municipality_included <- NULL
      treatment.cbo$municipality_included[treatment.cbo$cboincentives==1] <- sample(c(rep(0,26), rep(1,149)))
      treatment.cbo$municipality_included[treatment.cbo$cboincentives==0] <- sample(c(rep(0,24), rep(1,150)))
      treatment.cbo$communetype <- sample(c(rep("URBAINE", 47), rep("RURALE", 302)))
    }
    
    ## LOAD ACLED CONFLICT DATA
    acled <- read.dta13(here(datatype,"Source","ACLED Data", "acled_long.dta"))
    
    ## LOAD DATA FROM EXPECTATIONS SURVEY
    expectations <- read.csv(here(datatype,"Source", "Expectations Survey","CBO Expectations Survey Anonymized.csv"), stringsAsFactors = FALSE)

# 3. Load user-written functions --------------- ######
    
    list.files(here("Code", "Rcode - reproducible package", "0X - User-written functions"), full.names = TRUE) %>% walk(source)
    
# 4. Data cleaning and construction of outcome indicators  --------------- ######
    
    list.files(here("Code", "Rcode - reproducible package", "01-09 - Data Cleaning and Preparation"), full.names = TRUE) %>% walk(source)
    
# 5. Outputs: figures --------------- ######
    
    #Note# Code chunks are named after the name of the output they generate
    
    list.files(here("Code", "Rcode - reproducible package", "10-13 - Analysis", "11 - Figures"), full.names = TRUE) %>% walk(source)
  
# 6. Outputs: tables --------------- ######
    
    #Note# Code chunks are named after the name of the output they generate
    
    list.files(here("Code", "Rcode - reproducible package", "10-13 - Analysis", "12 - Tables"), full.names = TRUE) %>% walk(source) #Distributions & estimations
    

    
# END --------------- ######
  
